First, I’m loading the required libraries & functions
suppressPackageStartupMessages(library(Seurat, lib.loc = "/software/Seuratv4/lib/")) # Seurat v4.4.0, library for single-cell analysis
suppressPackageStartupMessages(library(SeuratObject, lib.loc = "/software/Seuratv4/lib/")) # For compatibility purpose
suppressPackageStartupMessages(library(data.table)) # For writing DE gene file
suppressPackageStartupMessages(library(readxl)) # For reading xlsx files
suppressPackageStartupMessages(library(crayon)) # Just for bolding the console output :D
cat(bold("Seurat"), "version", as.character(packageVersion("Seurat")), "\n")
## Seurat version 4.4.0
cat(bold("SeuratObject"), "version", as.character(packageVersion("SeuratObject")), "\n")
## SeuratObject version 4.1.4
cat(bold("data.table"), "version", as.character(packageVersion("data.table")), "\n")
## data.table version 1.16.4
cat(bold("readxl"), "version", as.character(packageVersion("readxl")), "\n")
## readxl version 1.4.3
# Random seed
set.seed(42)
input_seurat_object <- "./data/Pan_neuro_integrated_FINAL.rds"
input_oxphos_genes <- "./data/Oxphos_genes.xlsx"
input_gene_mapping <- "./data/features.tsv"
input_tf_list <- "./data/FBgg0000745_TF_Flybase.filtered.txt"
output_file_prefix <- "./figures/Figure_XXXXX/"
First step is to read the Seurat objects, previously created
message("Loading Seurat object...")
## Loading Seurat object...
data.seurat <- readRDS(input_seurat_object)
# Filter only CTRL cells
data.seurat <- data.seurat[,data.seurat$orig.ident == "Pan_neuro_control"]
message(ncol(data.seurat), " cells were loaded")
## 11742 cells were loaded
message(nrow(data.seurat), " genes were loaded")
## 23932 genes were loaded
# Prepare
correlation_data <- data.frame(gene = rownames(data.seurat), cor = NA, p = NA, r2 = NA)
rownames(correlation_data) <- correlation_data$gene
# Initialize progress bar
num_genes <- nrow(data.seurat)
pb <- txtProgressBar(min = 0, max = num_genes, style = 3)
## | | | 0%
# Go through all genes
for(i in 1:num_genes){
gene <- rownames(data.seurat)[i]
# Calculate correlation, p-value, and R²
cor_test <- suppressWarnings(cor.test(data.seurat@assays$RNA@data[gene,], data.seurat$AUC_OXPHOS_68_Porcelli, method = "pearson"))
correlation_data[gene, "cor"] <- cor_test$estimate
correlation_data[gene, "p"] <- cor_test$p.value
correlation_data[gene, "r2"] <- cor_test$estimate^2
# Update progress bar
setTxtProgressBar(pb, i)
}
## | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
correlation_data <- correlation_data[-1,]
# Close progress bar
close(pb)
# Sort by correlation
correlation_data <- correlation_data[order(correlation_data$cor, decreasing = T),]
correlation_data
# Read the Gene mapping
data.gene_mapping <- fread(input_gene_mapping, sep = "\t", header = F, data.table = F)
colnames(data.gene_mapping) <- c("Ensembl", "Gene", "Type")
rownames(data.gene_mapping) <- data.gene_mapping$Gene
data.gene_mapping
correlation_data$Ensembl <- data.gene_mapping[correlation_data$gene, "Ensembl"]
rownames(correlation_data) <- correlation_data$Ensembl
correlation_data
# Read the Excel file
data.oxphos <- read_excel(input_oxphos_genes, sheet = 1)
data.oxphos
correlation_data$in_Oxphos_68_Porcelli_list <- correlation_data$Ensembl %in% data.oxphos$Gene_Ensembl_ID
correlation_data
# Read the TF list
data.tf_list <- fread(input_tf_list, sep = "\t", header = F, data.table = F)$V1
correlation_data$is_in_TF_list <- correlation_data$gene %in% data.tf_list
correlation_data
# Write the results
fwrite(correlation_data, file = paste0(output_file_prefix, "gene_expression_correlation_OXPHOS_68_Porcelli_CTRL.tsv"), quote = F, sep = "\t", row.names = F, col.names = T)